@@ -61,7 +61,10 @@ module Agents |
||
| 61 | 61 |
end |
| 62 | 62 |
|
| 63 | 63 |
def validate_options |
| 64 |
- errors.add(:base, "url, expected_update_period_in_days, and extract are required") unless options[:expected_update_period_in_days].present? && options[:url].present? && options[:extract].present? |
|
| 64 |
+ errors.add(:base, "url and expected_update_period_in_days are required") unless options[:expected_update_period_in_days].present? && options[:url].present? |
|
| 65 |
+ if !options[:extract].present? && options[:type] != "json" |
|
| 66 |
+ errors.add(:base, "extract is required for all types except json") |
|
| 67 |
+ end |
|
| 65 | 68 |
end |
| 66 | 69 |
|
| 67 | 70 |
def check |
@@ -74,45 +77,54 @@ module Agents |
||
| 74 | 77 |
request.on_success do |response| |
| 75 | 78 |
doc = parse(response.body) |
| 76 | 79 |
output = {}
|
| 77 |
- options[:extract].each do |name, extraction_details| |
|
| 78 |
- result = if extraction_type == "json" |
|
| 79 |
- output[name] = Utils.values_at(doc, extraction_details[:path]) |
|
| 80 |
- else |
|
| 81 |
- output[name] = doc.css(extraction_details[:css]).map { |node|
|
|
| 82 |
- if extraction_details[:attr] |
|
| 83 |
- node.attr(extraction_details[:attr]) |
|
| 84 |
- elsif extraction_details[:text] |
|
| 85 |
- node.text() |
|
| 86 |
- else |
|
| 87 |
- error ":attr or :text is required on HTML or XML extraction patterns" |
|
| 88 |
- return |
|
| 89 |
- end |
|
| 90 |
- } |
|
| 91 |
- end |
|
| 92 |
- log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}"
|
|
| 93 |
- end |
|
| 94 |
- |
|
| 95 |
- num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq
|
|
| 96 |
- |
|
| 97 |
- if num_unique_lengths.length != 1 |
|
| 98 |
- error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}"
|
|
| 99 |
- return |
|
| 100 |
- end |
|
| 101 |
- |
|
| 102 | 80 |
previous_payloads = events.order("id desc").limit(UNIQUENESS_LOOK_BACK).pluck(:payload).map(&:to_json) if options[:mode].to_s == "on_change"
|
| 103 |
- num_unique_lengths.first.times do |index| |
|
| 104 |
- result = {}
|
|
| 105 |
- options[:extract].keys.each do |name| |
|
| 106 |
- result[name] = output[name][index] |
|
| 107 |
- if name.to_s == 'url' |
|
| 108 |
- result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? |
|
| 109 |
- end |
|
| 110 |
- end |
|
| 111 | 81 |
|
| 82 |
+ if extraction_type == "json" && !options[:extract].present? |
|
| 83 |
+ result = doc |
|
| 112 | 84 |
if !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json)) |
| 113 | 85 |
log "Storing new result for '#{name}': #{result.inspect}"
|
| 114 | 86 |
create_event :payload => result |
| 115 | 87 |
end |
| 88 |
+ else |
|
| 89 |
+ options[:extract].each do |name, extraction_details| |
|
| 90 |
+ result = if extraction_type == "json" |
|
| 91 |
+ output[name] = Utils.values_at(doc, extraction_details[:path]) |
|
| 92 |
+ else |
|
| 93 |
+ output[name] = doc.css(extraction_details[:css]).map { |node|
|
|
| 94 |
+ if extraction_details[:attr] |
|
| 95 |
+ node.attr(extraction_details[:attr]) |
|
| 96 |
+ elsif extraction_details[:text] |
|
| 97 |
+ node.text() |
|
| 98 |
+ else |
|
| 99 |
+ error ":attr or :text is required on HTML or XML extraction patterns" |
|
| 100 |
+ return |
|
| 101 |
+ end |
|
| 102 |
+ } |
|
| 103 |
+ end |
|
| 104 |
+ log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}"
|
|
| 105 |
+ end |
|
| 106 |
+ |
|
| 107 |
+ num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq
|
|
| 108 |
+ |
|
| 109 |
+ if num_unique_lengths.length != 1 |
|
| 110 |
+ error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}"
|
|
| 111 |
+ return |
|
| 112 |
+ end |
|
| 113 |
+ |
|
| 114 |
+ num_unique_lengths.first.times do |index| |
|
| 115 |
+ result = {}
|
|
| 116 |
+ options[:extract].keys.each do |name| |
|
| 117 |
+ result[name] = output[name][index] |
|
| 118 |
+ if name.to_s == 'url' |
|
| 119 |
+ result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? |
|
| 120 |
+ end |
|
| 121 |
+ end |
|
| 122 |
+ |
|
| 123 |
+ if !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json)) |
|
| 124 |
+ log "Storing new result for '#{name}': #{result.inspect}"
|
|
| 125 |
+ create_event :payload => result |
|
| 126 |
+ end |
|
| 127 |
+ end |
|
| 116 | 128 |
end |
| 117 | 129 |
end |
| 118 | 130 |
hydra.queue request |
@@ -155,6 +155,31 @@ describe Agents::WebsiteAgent do |
||
| 155 | 155 |
event.payload[:version].should == 2 |
| 156 | 156 |
event.payload[:title].should == "first" |
| 157 | 157 |
end |
| 158 |
+ |
|
| 159 |
+ it "stores the whole object if :extract is not specified" do |
|
| 160 |
+ json = {
|
|
| 161 |
+ :response => {
|
|
| 162 |
+ :version => 2, |
|
| 163 |
+ :title => "hello!" |
|
| 164 |
+ } |
|
| 165 |
+ } |
|
| 166 |
+ stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200) |
|
| 167 |
+ site = {
|
|
| 168 |
+ :name => "Some JSON Response", |
|
| 169 |
+ :expected_update_period_in_days => 2, |
|
| 170 |
+ :type => "json", |
|
| 171 |
+ :url => "http://json-site.com", |
|
| 172 |
+ :mode => :on_change |
|
| 173 |
+ } |
|
| 174 |
+ checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
| 175 |
+ checker.user = users(:bob) |
|
| 176 |
+ checker.save! |
|
| 177 |
+ |
|
| 178 |
+ checker.check |
|
| 179 |
+ event = Event.last |
|
| 180 |
+ event.payload[:response][:version].should == 2 |
|
| 181 |
+ event.payload[:response][:title].should == "hello!" |
|
| 182 |
+ end |
|
| 158 | 183 |
end |
| 159 | 184 |
end |
| 160 | 185 |
end |